# clear environment
rm(list=ls())

# load data set with keyword counts and crime numbers
dat = read.csv("police_metadata.csv")

# calculate difference in right-wing and left-wing keywords on issue level
dat$kw_diff <- dat$count_right_kw - dat$count_left_kw

# calculate difference in right-wing and left-wing crime
dat$extreme_crime_diff <- dat$extreme_crime_right - dat$extreme_crime_left

# impute missing values in crime difference with federal level crime difference
bund_dat <- dat[dat$jurisdiction=="bund", c("year", "extreme_crime_right", "extreme_crime_left")]
bund_dat$extreme_crime_diff_fed <- bund_dat$extreme_crime_right - bund_dat$extreme_crime_left
bund_dat <- bund_dat[,c("year", "extreme_crime_diff_fed")]
bund_dat = bund_dat[!duplicated(bund_dat$year),]

dat <- merge(dat, bund_dat, by="year", all.x=T)

dat$extreme_crime_diff_imp <- ifelse(is.na(dat$extreme_crime_diff), dat$extreme_crime_diff_fed, dat$extreme_crime_diff)

# create bias measure
dat$bias_diff_extreme_imp <- dat$kw_diff/sd(dat$kw_diff, na.rm=T) - dat$extreme_crime_diff_imp/sd(dat$extreme_crime_diff_imp, na.rm=T)

# change reference level for police unions
dat$union <- factor(dat$union, levels=c("gdp", "dpolg"))

## regression models (NOTE: we use section fixed effects, which are almost the same as jurisdictions. For simplicity, in the paper we refer to them as "jurisdiction fixed effects")
# position
mod_1 <- lm(kw_diff~factor(union), data=dat)
mod_2 <- lm(kw_diff~factor(union)+factor(section), data=dat)
mod_3 <- lm(kw_diff~factor(union)+factor(section)+factor(year), data=dat)
# bias
mod_4 <- lm(bias_diff_extreme_imp~factor(union), data=dat)
mod_5 <- lm(bias_diff_extreme_imp~factor(union)+factor(section), data=dat)
mod_6 <- lm(bias_diff_extreme_imp~factor(union)+factor(section)+factor(year), data=dat)
